﻿
using System;
using System.Collections.Generic;
using System.Linq;
using CatEars.Core.Collections;

namespace CatEars.Text.FastFind
{
    /// <summary>
    /// 快速搜索子串（长串在短串集合中搜索）
    /// </summary>
    public class FastFindSubStr
    {
        /// <summary>
        /// 索引模式
        /// </summary>
        public FastFindIndexMode IndexMode { get; private set; }
        /// <summary>
        /// FastFindSubStrResult中Tag的取值方式
        /// </summary>
        public FastFindResultTagMode TagMode { get; private set; }
        /// <summary>
        /// FastFindSubStrResult中MatchIndex的取值方式
        /// </summary>
        public FastFindResultIndexMode ResultIndexMode { get; private set; }

        /// <summary>
        /// 索引深度
        /// </summary>
        public int Depth { get; private set; }

        /// <summary>
        /// 当备选子串小于设置的值时，不再从索引中匹配(不能输入负数)
        /// </summary>
        public int SimpleCheckLimit { get; set; }

        /// <summary>
        /// 字符串
        /// </summary>
        List2<StringItem> _list;
        /// <summary>
        /// 单个字索引
        /// </summary>
        SingleLinkedNode<int>[] _singleCache = new SingleLinkedNode<int>[char.MaxValue];
        /// <summary>
        /// 多字索引
        /// </summary>
        SingleLinkedNode<CharItem>[][] _charCacheAsc;
        /// <summary>
        /// 多字索引
        /// </summary>
        SingleLinkedNode<CharItem>[][] _charCacheDesc;

        /// <summary>
        /// 初始化
        /// </summary>
        /// <param name="intDepth">索引深度 越大越占内存</param>
        /// <param name="iMode">索引构建模式</param>
        /// <param name="tMode">FastFindSubStrResult中Tag的取值方式</param>
        /// <param name="rMode">FastFindSubStrResult中MatchIndex的取值方式</param>
        /// <param name="intSubCapacity">内部容器预置容量</param>
        public FastFindSubStr(
            int intDepth = 2,
            FastFindIndexMode iMode = FastFindIndexMode.Asc | FastFindIndexMode.Desc,
            FastFindResultTagMode tMode = FastFindResultTagMode.Many,
            FastFindResultIndexMode rMode = FastFindResultIndexMode.Start,
            int intSubCapacity = 0)
        {
            SimpleCheckLimit = 5;
            Depth = Math.Max(2, Math.Min(20, intDepth));
            IndexMode = iMode;
            TagMode = tMode;
            ResultIndexMode = rMode;
            _list = new List2<StringItem>(intSubCapacity);

            if (IndexMode == FastFindIndexMode.None) throw new NotSupportedException("iMode不能取值为None");
            if ((IndexMode & FastFindIndexMode.Asc) > 0)
            {
                _charCacheAsc = new SingleLinkedNode<CharItem>[Depth][];
                for (int i = 0; i < _charCacheAsc.Length; i++)
                {
                    _charCacheAsc[i] = new SingleLinkedNode<CharItem>[char.MaxValue];
                }
            }
            if ((IndexMode & FastFindIndexMode.Desc) > 0)
            {
                _charCacheDesc = new SingleLinkedNode<CharItem>[Depth][];
                for (int i = 0; i < _charCacheDesc.Length; i++)
                {
                    _charCacheDesc[i] = new SingleLinkedNode<CharItem>[char.MaxValue];
                }
            }
        }

        /// <summary>
        /// 添加快速搜索子串
        /// </summary>
        /// <param name="strValue">值</param>
        /// <param name="objTag">附加对象</param>
        public void AddValue(string strValue, object objTag = null)
        {
            if (string.IsNullOrEmpty(strValue)) return;
            StringItem stringItem = new StringItem()
            {
                Value = strValue,
                Tag = objTag,
            };
            int intID = _list.Count;
            _list.Add(stringItem);
            if (strValue.Length == 1)
            {
                char chr = strValue[0];
                //单向链表，后进去的在前面
                var item = _singleCache[chr];
                _singleCache[chr] = new SingleLinkedNode<int>() { Value = intID, Next = item };
            }
            else
            {
                int intTo = Math.Min(Depth, strValue.Length);
                for (int i = 0; i < intTo; i++)
                {
                    AddCharToColl(intID, i, strValue);
                }
            }
        }

        /// <summary>
        /// 添加字符索引到集合
        /// </summary>
        /// <param name="intID">ID</param>
        /// <param name="intIndex">取哪个字符</param>
        /// <param name="strValue">完整字符串</param>
        private void AddCharToColl(int intID, int intIndex, string strValue)
        {
            int intLen = strValue.Length;
            //结构体初始化一个就可以了
            CharItem charItem = new CharItem()
            {
                ID = intID,
                Length = intLen,
            };
            if (_charCacheAsc != null)
            {
                var coll = _charCacheAsc[intIndex];
                int intIndex0 = intIndex;
                char chrC = strValue[intIndex0];
                charItem.Index = intIndex;

                var item = coll[chrC];
                coll[chrC] = new SingleLinkedNode<CharItem>() { Value = charItem, Next = item };
            }
            if (_charCacheDesc != null)
            {
                var coll = _charCacheDesc[intIndex];
                int intIndex0 = intLen - intIndex - 1;
                char chrC = strValue[intIndex0];
                charItem.Index = intIndex;

                var item = coll[chrC];
                coll[chrC] = new SingleLinkedNode<CharItem>() { Value = charItem, Next = item };
            }
        }

        /// <summary>
        /// 搜索开头子串，指定位置
        /// </summary>
        /// <param name="str"></param>
        /// <param name="coll"></param>
        /// <param name="intOffset"></param>
        /// <param name="blnReserve"></param>
        /// <param name="sortMode"></param>
        /// <returns></returns>
        private ICollection<FastFindSubStrResult> FindByIndex(
            string str,
            SingleLinkedNode<CharItem>[][] coll,
            int intOffset,
            bool blnReserve,
            FastFindResultSortMode sortMode)
        {
            FastFindSubStrResult singleResult = null;
            ICollection<FastFindSubStrResult> result = null;

            string strValue = str;
            char chrC0 = strValue[intOffset];
            var charNode = coll[0][chrC0];
            int intTo = Math.Min(Depth, strValue.Length - intOffset);
            if (charNode == null || intTo == 1)
            {
                if (FindByIndex_SingleWord(chrC0, intOffset, ref singleResult))
                {
                    result = new FastFindSubStrResult[] { singleResult };
                }
            }
            else
            {
                List<FastFindSubStrResult> result0 = new List<FastFindSubStrResult>();
                result = result0;

                //定义3个HashSet [0]前面一次的结果 [1]当前字符的结果 [2]确定存在的结果 配合FindByIndex_CalcIDFromNode方法使用
                HashSet<int>[] idColls = new HashSet<int>[3];
                idColls[0] = new HashSet<int>();
                idColls[1] = new HashSet<int>();
                idColls[2] = new HashSet<int>();
                //intCharIndex 记录有几个字已经验证过
                int intCharIndex = 0;
                if (!FindByIndex_CalcIDFromNode(charNode, idColls, intCharIndex++))
                {
                    if (idColls[1].Count == 0)
                    {
                        //一个字都验证不过的，直接跳出（单个字在前面一个分支验证过了）
                        goto CheckSingleWord;
                    }
                    else
                    {
                        goto BuildResult;
                    }
                }
                for (; intCharIndex < intTo; intCharIndex++)
                {
                    char chrC2 = strValue[intCharIndex + intOffset];
                    charNode = coll[intCharIndex][chrC2];
                    if (!FindByIndex_CalcIDFromNode(charNode, idColls, intCharIndex))
                    {
                        intCharIndex++;
                        break;
                    }
                }
            BuildResult:
                FindByIndex_BuildResult(result0, idColls[2], idColls[1], strValue, intCharIndex, intOffset, blnReserve);
            CheckSingleWord:
                if (FindByIndex_SingleWord(chrC0, intOffset, ref singleResult))
                {
                    if (result0.Count == 0) result = new FastFindSubStrResult[] { singleResult };
                    else result0.Add(singleResult);
                }
            }
            if (result == null || result.Count == 0) return EmptyArray<FastFindSubStrResult>.Array;
            if (result.Count == 1) return result;
            return SortResult(result, sortMode);
        }

        /// <summary>
        /// 搜索单字子串，指定位置
        /// </summary>
        /// <param name="chr"></param>
        /// <param name="intOffset"></param>
        /// <param name="result"></param>
        /// <returns></returns>
        private bool FindByIndex_SingleWord(char chr, int intOffset, ref FastFindSubStrResult result)
        {
            var node = _singleCache[chr];
            if (node == null)
            {
                return false;
            }
            StringItem stringItem;
            if (TagMode == FastFindResultTagMode.None)
            {
                stringItem = _list[node.Value];
                result = new FastFindSubStrResult(stringItem.Value, EmptyArray.ObjectArray, intOffset);
            }
            else if (TagMode == FastFindResultTagMode.Single)
            {
                stringItem = _list[node.Value];
                result = new FastFindSubStrResult(stringItem.Value, new object[] { stringItem.Tag }, intOffset);
            }
            else
            {
                HashSet<object> tags = new HashSet<object>();
                do
                {
                    stringItem = _list[node.Value];
                    tags.Add(stringItem.Tag);
                    node = node.Next;
                }
                while (node != null);
                result = new FastFindSubStrResult(stringItem.Value, tags.ToArray(), intOffset);
            }
            return true;
        }
        /// <summary>
        /// 从node计算ID到HashSet中
        /// </summary>
        /// <param name="node"></param>
        /// <param name="idColls"></param>
        /// <param name="intIndex"></param>
        /// <returns>false表示不需要再在索引中验证字符串</returns>
        private bool FindByIndex_CalcIDFromNode(
            SingleLinkedNode<CharItem> node,
            HashSet<int>[] idColls,
            int intIndex)
        {
            var hs1 = idColls[0];
            hs1.Clear();
            idColls[0] = idColls[1];
            idColls[1] = hs1;
            var hs0 = idColls[0];
            while (node != null)
            {
                var value = node.Value;
                int intID = value.ID;
                if (intIndex == 0 || hs0.Contains(intID))
                {
                    if (value.Index + 1 == value.Length)
                    {
                        //是最后一个字符满足条件，当作后面字符都满足条件
                        idColls[2].Add(intID);
                    }
                    else
                    {
                        hs1.Add(intID);
                    }
                }
                node = node.Next;
            }
            if (hs1.Count <= SimpleCheckLimit) return false;
            return true;
        }

        /// <summary>
        /// 根据ID生成结果
        /// </summary>
        /// <param name="result0"></param>
        /// <param name="idCollSure">确认了不需要验证的</param>
        /// <param name="idCollNotSure">还需要进一步验证的</param>
        /// <param name="strValue"></param>
        /// <param name="intIgnoreCheckWord">可以忽略验证几个字</param>
        /// <param name="intOffset"></param>
        /// <param name="blnReserve"></param>
        private void FindByIndex_BuildResult(
            List<FastFindSubStrResult> result0,
            HashSet<int> idCollSure,
            HashSet<int> idCollNotSure,
            string strValue,
            int intIgnoreCheckWord,
            int intOffset,
            bool blnReserve)
        {
            if (idCollSure.Count + idCollNotSure.Count == 0)
            {
                return;
            }
            Dictionary<string, SingleLinkedNode<object>>
                uniqueCache = new Dictionary<string, SingleLinkedNode<object>>();
            foreach (int intID in idCollSure)
            {
                var item = _list[intID];
                string strMatch = item.Value;
                SingleLinkedNode<object> node;
                uniqueCache.TryGetValue(strMatch, out node);
                uniqueCache[strMatch] = new SingleLinkedNode<object>() { Value = item.Tag, Next = node };
            }
            int intTmp = intOffset + intIgnoreCheckWord;
            foreach (int intID in idCollNotSure)
            {
                var item = _list[intID];
                string strMatch = item.Value;
                SingleLinkedNode<object> node;
                if (uniqueCache.TryGetValue(strMatch, out node))
                {
                    //缓存中已经有了
                    uniqueCache[strMatch] = new SingleLinkedNode<object>() { Value = item.Tag, Next = node };
                }
                else
                {
                    if (!MatchSubString(strValue, intTmp, strMatch, intIgnoreCheckWord, blnReserve))
                    {
                        continue;
                    }
                    uniqueCache[strMatch] = new SingleLinkedNode<object>() { Value = item.Tag };
                }
            }
            if (TagMode == FastFindResultTagMode.None)
            {
                foreach (var kv in uniqueCache)
                {
                    var result = new FastFindSubStrResult(kv.Key, EmptyArray.ObjectArray, intOffset);
                    result0.Add(result);
                }
            }
            else if (TagMode == FastFindResultTagMode.Single)
            {
                foreach (var kv in uniqueCache)
                {
                    var result = new FastFindSubStrResult(kv.Key, new object[] { kv.Value.Value }, intOffset);
                    result0.Add(result);
                }
            }
            else
            {
                HashSet<object> hsUnique = new HashSet<object>();
                foreach (var kv in uniqueCache)
                {
                    hsUnique.Clear();
                    var vNode = kv.Value;
                    while (vNode != null)
                    {
                        hsUnique.Add(vNode.Value);
                        vNode = vNode.Next;
                    }
                    var result = new FastFindSubStrResult(kv.Key, hsUnique.ToArray(), intOffset);
                    result0.Add(result);
                }
            }
        }

        /// <summary>
        /// 搜索开头子串
        /// </summary>
        /// <param name="str"></param>
        /// <param name="sortMode"></param>
        public ICollection<FastFindSubStrResult> FindStartWith(string str,
            FastFindResultSortMode sortMode = FastFindResultSortMode.ValueLength)
        {
            return FindStartByIndex(str, 0, sortMode);
        }

        /// <summary>
        /// 搜索开头子串，指定位置
        /// </summary>
        /// <param name="str"></param>
        /// <param name="intIndex"></param>
        /// <param name="sortMode"></param>
        public ICollection<FastFindSubStrResult> FindStartByIndex(
            string str,
            int intIndex,
            FastFindResultSortMode sortMode = FastFindResultSortMode.ValueLength)
        {
            if (_charCacheAsc == null) throw new NotSupportedException("Mode需要设置为FastFindIndexMode.Asc才能使用");
            if (str == null || intIndex >= str.Length) return EmptyArray<FastFindSubStrResult>.Array;
            string strValue = str;
            var result = FindByIndex(strValue, _charCacheAsc, intIndex, false, sortMode);

            if (ResultIndexMode != FastFindResultIndexMode.Start)
            {
                ReserveMatchIndex(result, str.Length);
            }

            return result;
        }

        /// <summary>
        /// 搜索结尾子串
        /// </summary>
        /// <param name="str"></param>
        /// <param name="sortMode"></param>
        public ICollection<FastFindSubStrResult> FindEndWith(string str,
            FastFindResultSortMode sortMode = FastFindResultSortMode.ValueLength)
        {
            if (string.IsNullOrEmpty(str)) return EmptyArray<FastFindSubStrResult>.Array;
            return FindEndByIndex(str, str.Length - 1, sortMode);
        }

        /// <summary>
        /// 搜索结尾子串，指定位置
        /// </summary>
        /// <param name="str"></param>
        /// <param name="intEndIndex">最后一个对齐的字符位置</param>
        /// <param name="sortMode"></param>
        public ICollection<FastFindSubStrResult> FindEndByIndex(string str, int intEndIndex,
            FastFindResultSortMode sortMode = FastFindResultSortMode.ValueLength)
        {
            if (_charCacheDesc == null) throw new NotSupportedException("Mode需要设置为FastFindIndexMode.Desc才能使用");
            if (string.IsNullOrEmpty(str)) return EmptyArray<FastFindSubStrResult>.Array;

            //直接把输入的串反过来
            string strValue = new string(str.Reverse().ToArray());
            int intIndex = str.Length - intEndIndex - 1;

            if (intIndex >= str.Length) return EmptyArray<FastFindSubStrResult>.Array;
            var result = FindByIndex(strValue, _charCacheDesc, intIndex, true, sortMode);

            if (ResultIndexMode != FastFindResultIndexMode.End)
            {
                ReserveMatchIndex(result, str.Length);
            }

            return result;
        }

        /// <summary>
        /// 搜索被包含的子串
        /// </summary>
        /// <param name="str"></param>
        /// <param name="sortMode"></param>
        /// <returns></returns>
        public ICollection<FastFindSubStrResult> FindContain(string str,
            FastFindResultSortMode sortMode)
        {
            if (string.IsNullOrEmpty(str)) return EmptyArray<FastFindSubStrResult>.Array;
            var sortMode0 = FastFindResultSortMode.None;
            if (sortMode == FastFindResultSortMode.MaxValueLength
                || sortMode == FastFindResultSortMode.MaxValueLengthEachIndex)
            {
                sortMode0 = sortMode;
            }

            List<FastFindSubStrResult> result = new List<FastFindSubStrResult>();
            if (_charCacheAsc != null)
            {
                for (int i = 0; i < str.Length; i++)
                {
                    var r0 = FindStartByIndex(str, i, sortMode0);
                    result.AddRange(r0);
                }
            }
            else
            {
                for (int i = str.Length - 1; i >= 0; i--)
                {
                    var r0 = FindEndByIndex(str, i, sortMode0);
                    result.AddRange(r0);
                }
            }
            if (sortMode == FastFindResultSortMode.MaxValueLengthEachIndex)
            {
                return result;
            }
            return SortResult(result, sortMode);
        }

        /// <summary>
        /// 搜索相同项
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public FastFindSubStrResult FindEqual(string str)
        {
            if (string.IsNullOrEmpty(str)) return null;
            ICollection<FastFindSubStrResult> coll;
            if (_charCacheAsc != null)
            {
                coll = FindStartWith(str, FastFindResultSortMode.None);
            }
            else
            {
                coll = FindEndWith(str, FastFindResultSortMode.None);
            }
            if (coll == null || coll.Count == 0) return null;
            int intLen = str.Length;
            foreach (var item in coll)
            {
                if (item.Value.Length == intLen)
                {
                    return item;
                }
            }
            return null;
        }

        #region 静态方法

        /// <summary>
        /// 字符串对比 判断str1从intStartIndex开始是否与str2在intStr2StartIndex之后的字符完全一致
        /// </summary>
        /// <param name="str">字符串</param>
        /// <param name="intStrStartIndex">str开始匹配的位置</param>
        /// <param name="strSub">子字符串</param>
        /// <param name="intStrSubStartIndex">strSub开始匹配的位置</param>
        /// <param name="blnReserveSubStr">strSub倒置后再匹配</param>
        /// <returns>返回值</returns>
        public static bool MatchSubString(
            string str,
            int intStrStartIndex,
            string strSub,
            int intStrSubStartIndex,
            bool blnReserveSubStr = false)
        {
            if (str.Length - intStrStartIndex < strSub.Length - intStrSubStartIndex)
            {
                //长度不足
                return false;
            }
            //这里从后往前比，后面的更容易匹配不上，效率微微高一点
            if (!blnReserveSubStr)
            {
                int intTmp = intStrStartIndex - intStrSubStartIndex;
                for (int i = strSub.Length - 1; i >= intStrSubStartIndex; i--)
                {
                    if (str[intTmp + i] != strSub[i])
                    {
                        return false;
                    }
                }
            }
            else
            {
                //正常应是这样的
                //int intTmp = intStrStartIndex - intStrSubStartIndex;
                //for (int i = strSub.Length - 1; i >= intStrSubStartIndex; i--)
                //{
                //    if (str[intTmp + i] != strSub[strSub.Length - i - 1])
                //    {
                //        blnSame = false;
                //        break;
                //    }
                //}
                //定义 j = strSub.Length - i - 1;于是 i=strSub.Length - j - 1
                //下面的代码不好理解 是用上面的等式转换过来的
                int intTmp0 = strSub.Length - intStrSubStartIndex - 1;
                int intTmp1 = intTmp0 + intStrStartIndex;
                for (int j = 0; intTmp0 >= j; j++)
                {
                    if (str[intTmp1 - j] != strSub[j])
                    {
                        return false;
                    }
                }
            }
            return true;
        }

        /// <summary>
        /// 倒置FastFindSubStrResult的MatchIndex
        /// </summary>
        /// <param name="result"></param>
        /// <param name="intLen"></param>
        public static void ReserveMatchIndex(
            ICollection<FastFindSubStrResult> result, int intLen)
        {
            foreach (var item in result)
            {
                item.MatchIndex = intLen - item.Value.Length - item.MatchIndex;
            }
        }

        /// <summary>
        /// 对结果进行排序
        /// </summary>
        /// <param name="result"></param>
        /// <param name="sortMode"></param>
        /// <returns></returns>
        public static ICollection<FastFindSubStrResult> SortResult(
            ICollection<FastFindSubStrResult> result,
            FastFindResultSortMode sortMode)
        {
            if (result == null || result.Count <= 1) return result;
            switch (sortMode)
            {
                case FastFindResultSortMode.ValueLength:
                    return result.OrderByDescending(t => t.Value.Length).ToArray();
                case FastFindResultSortMode.ValueLength_MatchIndex:
                    return result.OrderBy(t => t.MatchIndex).OrderByDescending(t => t.Value.Length).ToArray();
                case FastFindResultSortMode.MatchIndex_ValueLength:
                    return result.OrderByDescending(t => t.Value.Length).OrderBy(t => t.MatchIndex).ToArray();
                case FastFindResultSortMode.ValueLength_MatchIndex2:
                    return result.OrderByDescending(t => t.MatchIndex).OrderByDescending(t => t.Value.Length).ToArray();
                case FastFindResultSortMode.MatchIndex2_ValueLength:
                    return result.OrderByDescending(t => t.Value.Length).OrderByDescending(t => t.MatchIndex).ToArray();
                case FastFindResultSortMode.MaxValueLength:
                case FastFindResultSortMode.MaxValueLengthEachIndex:
                    FastFindSubStrResult r = null;
                    int intL1 = 0;
                    int intL2 = 0;
                    foreach (var item in result)
                    {
                        if (r == null)
                        {
                            r = item;
                            intL1 = r.Value.Length;
                            intL2 = r.MatchIndex;
                        }
                        else
                        {
                            int intL1x = item.Value.Length;
                            int intL2x = item.MatchIndex;
                            //优先取长度大 位置靠前的
                            if (intL1x > intL1 || (intL1x == intL1 && intL2x < intL2))
                            {
                                r = item;
                                intL1 = intL1x;
                                intL2 = intL2x;
                            }
                        }
                    }
                    return new FastFindSubStrResult[] { r };
                default:
                    return result;
            }
        }

        /// <summary>
        /// 来自多个FastFindSubStr的结果进行合并
        /// </summary>
        /// <param name="result"></param>
        /// <param name="tagMode">根据TagMode决定不同的合并策略</param>
        /// <returns></returns>
        public static ICollection<FastFindSubStrResult> MergeResult(
            ICollection<FastFindSubStrResult> result,
            FastFindResultTagMode tagMode)
        {
            if (result == null || result.Count <= 1) return result;
            var lookup1 = result.ToLookup(t => t.MatchIndex);
            List<FastFindSubStrResult> resultNew = new List<FastFindSubStrResult>(result.Count);
            bool blnFlag = false;
            foreach (var group1 in lookup1)
            {
                var lookup2 = group1.ToLookup(t => t.Value);
                foreach (var group2 in lookup2)
                {
                    var arr = group2.ToArray();
                    if (arr.Length == 1) resultNew.Add(arr[0]);
                    else
                    {
                        blnFlag = true;
                        resultNew.Add(MergeResultToOne(arr, tagMode));
                    }
                }
            }
            if (blnFlag) return resultNew.ToArray();
            else return result;
        }

        /// <summary>
        /// 来自多个FastFindSubStr的结果进行合并
        /// </summary>
        /// <param name="result"></param>
        /// <param name="tagMode">根据TagMode决定不同的合并策略</param>
        /// <returns></returns>
        internal static FastFindSubStrResult MergeResultToOne(
            ICollection<FastFindSubStrResult> result,
            FastFindResultTagMode tagMode)
        {
            if (tagMode == FastFindResultTagMode.None
                || tagMode == FastFindResultTagMode.Single)
            {
                return result.FirstOrDefault();
            }
            FastFindSubStrResult resultItem = null;
            HashSet<object> hsTags = new HashSet<object>();
            foreach (var item in result)
            {
                if (item == null) continue;
                if (resultItem == null)
                {
                    resultItem = new FastFindSubStrResult(item.Value, null, item.MatchIndex);
                }
                if (item.Tags != null) hsTags.UnionWith(item.Tags);
            }
            if (resultItem != null)
            {
                resultItem.Tags = hsTags.ToArray();
            }
            return resultItem;
        }

        #endregion
    }
}
